In Atan1988/FlexFit: Fits Severity distributions for Insurance data

We first load IronFit library, and also define d, and p functions of a mixed lognormal distributions, with two sets of parameters

library(IronFit)
library(readxl)
library(tidyverse)
library(CompLognormal)
library(scales)
#two lognormal
pmixlnorm <- function(q, w1, alpha = 2000, meanlog1, sdlog1, meanlog2, sdlog2) {
  w1n <- pmin(exp(w1), alpha) / alpha
  w2n <- 1 - w1n

  # w1n <- w1 / (w1 + w2)
  # w2n <- w2 / (w1 + w2)

  w1n * plnorm(q, meanlog = meanlog1, sdlog = sdlog1) + 
      w2n * plnorm(q, meanlog = meanlog2, sdlog = sdlog2)
} 

dmixlnorm <- function(x, w1, alpha,  meanlog1, sdlog1, meanlog2, sdlog2) {
  w1n <- pmin(exp(w1), alpha) / alpha
  w2n <- 1 - w1n

  # w1n <- w1 / (w1 + w2)
  # w2n <- w2 / (w1 + w2)

    w1n * dlnorm(x, meanlog = meanlog1, sdlog = sdlog1) + 
      w2n * dlnorm(x, meanlog = meanlog2, sdlog = sdlog2)
}


pmixlnorm2 <- function(q, ws,  meanlogs, sdlogs) {
  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(plnorm, meanlog = meanlogs, sdlog = sdlogs, 
                       MoreArgs = list(q = q), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(q), byrow = T))
} 

dmixlnorm2 <- function(x, ws,  meanlogs, sdlogs ) {

  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(dlnorm, meanlog = meanlogs, sdlog = sdlogs, 
                       MoreArgs = list(x = x), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(x), byrow = T))
} 

pmixgamma2 <- function(q, ws,  shapes, scales) {
  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(pgamma, shape = exp(shapes), rate = 1 / exp(scales), 
                       MoreArgs = list(q = q), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(q), byrow = T))
}

dmixgamma2 <- function(x, ws,  shapes, scales) {
  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(dgamma, shape = exp(shapes), rate = 1 / exp(scales), 
                       MoreArgs = list(x = x), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(x), byrow = T))
}

pmixexp <- function(q, ws, scales) {
  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(pexp, rate = 1 / exp(scales), 
                       MoreArgs = list(q = q), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(q), byrow = T))
} 

dmixexp <- function(x, ws, scales) {
  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(dexp, rate = 1 / exp(scales), 
                       MoreArgs = list(x = x), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(x), byrow = T))
} 

pmixexp2 <- function(q, ws, rates) {
  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(pexp, rate = rates, 
                       MoreArgs = list(q = q), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(q), byrow = T))
} 

dmixexp2 <- function(x, ws, rates) {
  wn <- exp(ws) / sum(exp(ws))

  z <- mapply(dexp, rate = rates, 
                       MoreArgs = list(x = x), SIMPLIFY = F)
  as.vector( wn %*% matrix(unlist(z), ncol = length(x), byrow = T))
} 

pmixlnGM <- function(q, w1, w2, meanlog1, sdlog1, shape2, scale2) {
  w1 <- exp(w1)
  w2 <- exp(w2)

  w1n <- w1 / (w1 + w2)
  w2n <- w2 / (w1 + w2)

  w1n * plnorm(q, meanlog = meanlog1, sdlog = sdlog1) + 
      w2n * pgamma(q, shape = exp(shape2), scale = exp(scale2) )
}

dmixlnGM <- function(x, w1, w2, meanlog1, sdlog1, shape2, scale2) {
  w1 <- exp(w1)
  w2 <- exp(w2)

  w1n <- w1 / (w1 + w2)
  w2n <- w2 / (w1 + w2)

  w1n * dlnorm(x, meanlog = meanlog1, sdlog = sdlog1) + 
      w2n * dgamma(x, shape = exp(shape2), scale = exp(scale2) )
}

tpmixlnorm <- function(q, Att, w1, w2, meanlog1, sdlog1, meanlog2, sdlog2) {
  ptrunc(q, FUN = "mixlnorm", Att=Att, rTrunc = Inf, w1, w2, meanlog1, sdlog1, meanlog2, sdlog2)
}

tplnorm <- function(q, Att, meanlog, sdlog) {
  ptrunc(q, FUN = "lnorm", Att=Att, rTrunc = Inf, meanlog, sdlog)
}

tqlnorm <- function(p, Att, meanlog, sdlog) {
  qtrunc(p, FUN = "lnorm", Att=Att, rTrunc = Inf, meanlog, sdlog)
}


dclnlnB <- function(x, sdlog1, m2, s2, theta) {
  dcomplnorm(x, "lnorm", sigma = exp(sdlog1), 
             theta = exp(theta) * 1e6, meanlog = exp(m2), sdlog = exp(s2))
}

dclnexp <- function(x, sdlog1, m2, theta) {
  dcomplnorm(x, "exp", sigma = exp(sdlog1), 
             theta = exp(theta), rate = 1/exp(m2))
}

pclnexp <- function(x, sdlog1, m2, theta) {
  pcomplnorm(x, "exp", sigma = exp(sdlog1), 
             theta = exp(theta) ,  rate = 1/exp(m2))
}

qclnexp <- function(x, sdlog1, m2, theta) {
  qcomplnorm(x, "exp", sigma = exp(sdlog1), 
             theta = exp(theta), rate = 1/exp(m2))
}

pclnlnB <- function(q, sdlog1, m2, s2, theta) {
  pcomplnorm(q, "lnorm", sigma = exp(sdlog1),
             theta = exp(theta) * 1e6, meanlog = exp(m2), sdlog = exp(s2))
}

tpclnlnB <- function(q, Att, sdlog1, m2, s2, theta) {
  ptrunc(q, FUN = "clnlnB", Att=Att, rTrunc = Inf, sdlog1, m2, s2, theta)
}

qclnlnB <- function(p, sdlog1, m2, s2, theta) {
  qcomplnorm(p, "lnorm", sigma = exp(sdlog1),
             theta = exp(theta) * 1e6, meanlog = exp(m2), sdlog = exp(s2))
}

tqclnlnB <- function(p, Att, sdlog1, m2, s2, theta) {
  qtrunc(p, FUN = "clnlnB", Att=Att, rTrunc = Inf, sdlog1, m2, s2, theta)
}

rclnlnB <- function(n, sdlog1, m2, s2, theta) {
  rcomplnorm(n, "lnorm", sigma = exp(sdlog1), 
             theta = exp(theta) * 1e6, meanlog = exp(m2), sdlog = exp(s2))
}

Load the data in

# filepath<- "Z:\\Actuarial\\IronHealth\\Warehouse\\HPL\\2017\\updated LG\\New folder\\"
# filename<- "HPL warehouse study 2017-ILFs(net of SIR fit)AM.xlsm"
# fullfilepath <- paste0(filepath, filename)
# #hpl_g1_raw <- readxl::read_excel(fullfilepath, sheet="G1", range=cell_cols("Y:AC"))
# hpl_g2_raw <- readxl::read_excel(fullfilepath, sheet="G2", range=cellranger::cell_cols("Y:AC"))
# #hpl_g3_raw <- readxl::read_excel(fullfilepath, sheet="G3", range=cell_cols("Y:AC"))
# #hpl_g4_raw <- readxl::read_excel(fullfilepath, sheet="G4", range=cell_cols("Y:AC"))
data(G2)
head(hpl_g2_raw)

Fit the mixed lognormal

trunc <- 10e3
y <- hpl_g2_raw$`Trd Incd`[hpl_g2_raw$`Trd Incd` > trunc]

system.time(fit0 <- sevfit(y - trunc, Att = trunc, FUN = 'lnorm', ini = c( 12, 2),
       parnames = c('meanlog', 'sdlog'), gn_ll_mtd = NULL
       )
)

system.time(fit1 <- sevfit(y - trunc, Att = trunc, FUN = 'mixlnorm', ini = c(2,10, 3, 12, 2),
       parnames = c('w1', 'meanlog1', 'sdlog1', 'meanlog2', 'sdlog2')
       , fixedpar = 200, fixedparn = "alpha", gn_ll_mtd = NULL
       )
)

system.time(fit1b <- sevfit(y - trunc, Att = trunc, FUN = 'mixlnorm2', 
                ini = c(0.5, 0.5, 0.5, 5, 10, 12, 3, 3, 2 ),
       parnames = c('ws', 'meanlogs', 'sdlogs'), gn_ll_mtd = NULL
       )
)


system.time(fit2a <- sevfit(y - trunc, Att = trunc, FUN = 'mixexp', 
                ini = c(rep(0.5, 5), seq(4, 12, 2) ),
       parnames = c('ws', 'scales'), gn_ll_mtd = NULL
       )
)

system.time(fit2b <- sevfit(y - trunc, Att = trunc, FUN = 'mixexp', 
                ini = c(rep(0.5, 5), seq(10, 18, 2) ),
       parnames = c('ws', 'scales'), gn_ll_mtd = NULL
)
)


system.time(fit2c <- sevfit(y - trunc, Att = trunc, FUN = 'mixexp', 
                ini = c(rep(0.5, 6), seq(8, 18, 2) ),
       parnames = c('ws', 'scales'), gn_ll_mtd = NULL
       )
)

system.time(fit2c_k <- sevfit(y - trunc, Att = trunc, FUN = 'mixexp2', 
                ini = c(rep(0.5, 6), 1 / exp(seq(8, 18, 2)) ),
       parnames = c('ws', 'rates'), gn_ll_mtd = NULL
       )
)


system.time(fit2c_pr <- sevfit(y, Att = trunc, FUN = 'mixexp', 
                ini = list(ws = rep(1/6, 6), 
                           scales = seq(8, 18, 2)
                           ),
       prior = alist(ws ~ dirichlet(seq(6, 1, -1)), scales ~ norm(12, 10))
       )
)

system.time(fit2d <- sevfit(y - trunc, Att = trunc, FUN = 'mixexp', 
                ini = c(rep(0.5, 8), seq(4, 18, 2) ),
       parnames = c('ws', 'scales'), gn_ll_mtd = NULL
       )
)



system.time(fit3a <- sevfit(y - trunc, Att = trunc, FUN = 'mixgamma2', 
                ini = c(rep(1/6, 4), rep(0, 4),  seq(12, 18, 2)),
       parnames = c('ws', 'shapes', 'scales'), gn_ll_mtd = NULL,
       prior = alist(ws ~ dirichlet(seq(12, 3, -3)), shapes ~ norm(0, 0.5), scales ~ norm(10, 5))
       )
)
logLik(fit3a, y = y, Att = trunc)

system.time(fit_plotdf0 <- qpPlot_prepare(fit0, y = y, Att = trunc))
system.time(fit_plotdf1 <- qpPlot_prepare(fit1, y = y, Att = trunc))
system.time(fit_plotdf1b <- qpPlot_prepare(fit1b, y = y, Att = trunc))
system.time(fit_plotdf2a <- qpPlot_prepare(fit2a, y = y, Att = trunc))
system.time(fit_plotdf2b <- qpPlot_prepare(fit2b, y = y, Att = trunc))
system.time(fit_plotdf2c <- qpPlot_prepare(fit2c, y = y, Att = trunc))
system.time(fit_plotdf2c_pr <- qpPlot_prepare(fit2c_pr, y = y, Att = trunc))

sum(with(fit_plotdf1$qq_df, abs(act_ys - exp_ys)))/ sum(fit_plotdf1$qq_df$act_ys)
sum(with(fit_plotdf2b$qq_df, abs(act_ys - exp_ys)))/ sum(fit_plotdf2b$qq_df$act_ys)
sum(with(fit_plotdf2c$qq_df, abs(act_ys - exp_ys)))/ sum(fit_plotdf2c$qq_df$act_ys)